Importing the neccessary libraries and data
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
titanic = pd.read_excel("C:\\Users\\mawul\\Downloads\\titanic homework.xlsx")
titanic.head()
| pclass | survived | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | boat | body | home.dest | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | Allen, Miss. Elisabeth Walton | female | 29.0000 | 0 | 0 | 24160 | 211.3375 | B5 | S | 2 | NaN | St Louis, MO |
| 1 | 1 | 1 | Allison, Master. Hudson Trevor | male | 0.9167 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | 11 | NaN | Montreal, PQ / Chesterville, ON |
| 2 | 1 | 0 | Allison, Miss. Helen Loraine | female | 2.0000 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | NaN | NaN | Montreal, PQ / Chesterville, ON |
| 3 | 1 | 0 | Allison, Mr. Hudson Joshua Creighton | male | 30.0000 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | NaN | 135.0 | Montreal, PQ / Chesterville, ON |
| 4 | 1 | 0 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0000 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | NaN | NaN | Montreal, PQ / Chesterville, ON |
Horizontal bar chart for number of passengers survived by class
# Grouped data by class and calculate the number of survivors in each class
survived_by_class = titanic.groupby('pclass')['survived'].sum().reset_index()
# horizontal bar chart using Plotly
fig = px.bar(survived_by_class, x='survived', y='pclass', orientation='h',
labels={'survived': 'Number of Passengers Survived', 'pclass': 'Passenger Class'},
title='Number of Passengers Survived by Class')
# Show the plot
fig.show()
Vertical bar chart for passengers survived by class and gender
# Grouped data by class, gender, and calculated the number of survivors in each group
survived_by_class_gender = titanic.groupby(['pclass', 'sex'])['survived'].sum().reset_index()
# Vertical bar chart using Plotly
fig = px.bar(survived_by_class_gender, x='pclass', y='survived', color='sex',
labels={'survived': 'Number of Passengers Survived', 'pclass': 'Passenger Class'},
title='Number of Passengers Survived by Class and Gender',
category_orders={'pclass': [1, 2, 3]})
# Show the plot
fig.show()
Stacked Chart of Survived vs Dead Passengers for different classes and gender
# Grouped data by class, gender, and survival status and calculated the count in each group
survival_by_class_gender = titanic.groupby(['pclass', 'sex', 'survived']).size().reset_index(name='Count')
# Stacked bar chart using Plotly
fig = px.bar(survival_by_class_gender, x='pclass', y='Count', color='survived',
facet_col='sex', category_orders={'pclass': [1, 2, 3]},
labels={'Count': 'Number of Passengers', 'Pclass': 'Passenger Class'},
title='Survived vs. Dead for Different Classes and Genders')
# Show the plot
fig.show()
Histogram of fares paid for passengers in cabin 30 (B30, C30 and D30)
# Filtered data for passengers in cabins B30, C30, and D30
cabins = ['B30', 'C30', 'D30']
passengers = titanic[titanic['cabin'].str[:3].isin(cabins)]
# Histogram
fig = px.histogram(passengers, x='fare', nbins=20, title='Histogram of Fares for Passengers in Cabins B30, C30, and D30',
labels={'Fare': 'Fare', 'count': 'Number of Passengers'})
# Show the plot
fig.show()
Box Plot of fares by passenger class
# Box plot using Plotly
fig = px.box(titanic, x='pclass', y='fare', points='all', title='Box Plot of Fares by Passenger Class',
labels={'Pclass': 'Passenger Class', 'Fare': 'Fare'})
# Show the plot
fig.show()
Box Plot of the ages by passenger class
# Box plot using Plotly
fig = px.box(titanic, x='pclass', y='age', points='all', title='Box Plot of Age by Passenger Class',
labels={'pclass': 'Passenger Class', 'Age': 'Age'})
# Show the plot
fig.show()